// gallupAlternateBeliefs2.do
// Inputs: gallup_full.dta, buyer_countyeducation, SEER_Population
// Outputs: Gallup_gw_vars_2010_2016
// Date last updated: 1/27/2025

// Step 2: This file cleans the merged dataset and adds it to the population and education data created above.  It then uses regressions to impute time-varying county-level estimates for being "worried" about climate change, as not all years are present in the data.

********************************************************************************	
* Clean output R data
********************************************************************************
// Import full Gallup data
	use "$pathi\gallup_full.dta", replace

// Filter to discard missing controls
	keep if inlist(employ, 1, 2, 6) 
	keep if inlist(ideolrr, 1, 2, 3)
	keep if inlist(educ, 1, 2, 3, 4)
	keep if inlist(raceban2, 1, 2, 3, 4, 7)
	keep if inlist(gr, 1, 2)
	keep if inlist(children, 1, 2)
	keep if inlist(married, 1, 2)
	 
// Recode variables
	recode employ 1=1 2=1 6=0
	gen conservative = (ideolrr == 1)
	gen moderate = (ideolrr == 2)
	gen liberal = (ideolrr == 3)
	gen high_school = (educ == 1)
	gen some_col = (educ == 2)
	gen col_grad = (educ == 3)
	gen post_grad = (educ == 4)
	gen male = (gr == 1)
	gen female = (gr == 2)
	gen has_child = (children == 1)
	gen white = (raceban2 == 1)
	gen black = (raceban2 == 2)
	gen hispanic = (raceban2 == 3)
	gen asian = (raceban2 == 4)
	gen other_race = (raceban2 == 7)
	gen savings_i = - prssav
	gen debt_i = - prsdebt
	gen minority = (raceban2 != 1)

// Label variables
	label define minority 1 "minority" 0 "not_minority"
	label values minority minority
	label drop raceban2
	label define raceban2 1 "white" 2 "black" 3 "hispanic" 4 "asian" 7 "other"
	label values raceban2 raceban2
	label drop married
	label define married 1 "married" 0 "not_married"
	label values married married
	recode married 1=1 2=0
	label define has_child 1 "has_child" 0 "no_child"
	label values has_child has_child
	label var married
	label var gw_serious
	label var employ
	label var income
	label var age

// Invert responses, larger values indicate greater concern
	gen gw_when_i = - gw_when
	gen gw_understand_i = - gw_understand
	gen gw_cause_i = - gw_cause
	gen gw_hurricanes_i = - gw_hurricanes
	gen envworry_air_i = - envworry_air
	gen envworry_drnkwater_i = - envworry_drnkwater
	gen envworry_cchange_i = - envworry_cchange
	gen envworry_extinct_i = - envworry_extinct
	gen envtpartic_i = - envtpartic
	gen worry_unemploy_i = - worry_unemploy
	gen worry_envt_i = - worry_envt

// Destring fips code and save cleaned dataset
	destring area_fips, replace
	rename area_fips fips_code
	save "$pathi\gallup_cleaned.dta", replace

********************************************************************************	
* Merge with county education and demographic variables
********************************************************************************
// Use county educational attainment
	use "$pathi\buyer_countyeducation.dta", clear
	rename buyer_fips fips_code
	
// Merge with SEER population data 
	merge 1:1 year fips_code using "$pathi\SEER_Population.dta"
	keep if inrange(year, 2010, 2016)
	rename _merge seer_acs_merge

// Keep necessary vars, save temp file
	keep fips_code year share_hs_or_less share_some_col share_bachelors share_post_grad pop_share_age_18_29 pop_share_age_30_49 pop_share_age_50_64 pop_share_age_65plus pop_share_white pop_share_Black pop_share_other pop_share_Asian pop_share_Hispanic pop_share_female pop_share_male

	tempfile population
	save `population'

// Get gallup variables 
	use "$pathi\gallup_cleaned.dta", clear

// Recode global warming worry variable so that high values represent higher worry about global warming 
	gen envworry_gw_recode=1 if envworry_gw==4
	replace envworry_gw_recode=2 if envworry_gw==3
	replace envworry_gw_recode=3 if envworry_gw==2
	replace envworry_gw_recode=4 if envworry_gw==1

// Recode global warming worry variable so that high values represent higher worry about global warming 
	gen gw_when_recode=1 if gw_when==5
	replace gw_when_recode=2 if gw_when==4
	replace gw_when_recode=3 if gw_when==3
	replace gw_when_recode=4 if gw_when==2
	replace gw_when_recode=5 if gw_when==1

// Keep necessary vars, merge with temp file
	keep envworry_gw_recode gw_when_recode year state raceban2 ager male educ fips_code
	merge m:1 fips_code year using `population'
	rename _merge gallup_county_merge

// Make all fips codes have a year value 
	gen fips_string=fips_code
	tostring fips_string, replace

// Create state fips 
	gen fips_len=strlen(fips_string)
	gen state_fips=substr(fips_string, 1, 1) if fips_len==4 
	replace state_fips=substr(fips_string, 1, 2) if fips_len==5

// Replace missing states
	bysort state_fips: gen long obsno = _n
	by state_fips: gen countnonmissing = sum(!missing(state)) if !missing(state)
	bysort state_fips (countnonmissing) : gen firstnonmissing = state[1]
	replace state=firstnonmissing if state==.

	
********************************************************************************	
* Regression analysis for imputing
********************************************************************************
// Gw worry variable
	reghdfe envworry_gw_recode ib2002.year i.state i.raceban2 i.ager male i.educ if inrange(educ, 1,4), noabsorb vce(cluster state)

// Create imputed gw worry for each year 2010 to 2016
	forval y=2010(1)2016 {
		gen imputed_gw_worry`y' = _b[i`y'.year] + _b[i2.raceban2]*pop_share_Black + _b[i3.raceban2]*pop_share_Hispanic + _b[i3.raceban2]*pop_share_Asian + _b[i4.raceban2]*pop_share_other +_b[male]*pop_share_male + _b[i2.educ]*share_some_col + _b[i3.educ]*share_bachelors + _b[i4.educ]*share_post_grad + _b[i2.ager]*pop_share_age_30_49 + _b[i3.ager]*pop_share_age_50_64 + _b[i4.ager]*pop_share_age_65plus + _b[_cons] if year==`y'

		*add state coefs
		forval x=11(1)16{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=21(1)27{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=31(1)34{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=41(1)48{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=51(1)59{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=61(1)64{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=71(1)78{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}

		forval x=81(1)85{
			replace imputed_gw_worry`y'= imputed_gw_worry`y' + _b[i`x'.state] if state==`x'
		}
	}

// Create time varying imputed gw worry variable 
	gen imputed_gw_worry = .
	forval year=2010(1)2016 {
		replace imputed_gw_worry=imputed_gw_worry`year' if imputed_gw_worry==. & imputed_gw_worry`year' != .
	}

// Same thing for when think climate change will affect you - gw when
	reghdfe gw_when_recode ib2002.year i.state i.raceban2 i.ager male i.educ if inrange(educ, 1,4), noabsorb vce(cluster state)

// Create imputed gw_when
	forval y=2010(1)2016 {
		gen imputed_gw_when`y' = _b[i`y'.year] + _b[i2.raceban2]*pop_share_Black + _b[i3.raceban2]*pop_share_Hispanic + _b[i3.raceban2]*pop_share_Asian + _b[i4.raceban2]*pop_share_other +_b[male]*pop_share_male + _b[i2.educ]*share_some_col + _b[i3.educ]*share_bachelors + _b[i4.educ]*share_post_grad + _b[i2.ager]*pop_share_age_30_49 + _b[i3.ager]*pop_share_age_50_64 + _b[i4.ager]*pop_share_age_65plus  + _b[_cons] if year==`y'

	* Add state coefs
		forval x=11(1)16{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=21(1)27{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=31(1)34{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=41(1)48{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=51(1)59{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=61(1)64{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=71(1)78{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}

		forval x=81(1)85{
			replace imputed_gw_when`y'= imputed_gw_when`y' + _b[i`x'.state] if state==`x'
		}
	}

		* Create time varying imputed gw when variable 
		gen imputed_gw_when = .
		forval year=2010(1)2016 {
			replace imputed_gw_when=imputed_gw_when`year' if imputed_gw_when==. & imputed_gw_when`year' != .
		}

// Keep only years that have county data for, rename county fips to buyer fips
	keep if inrange(year, 2010, 2016)
	keep fips_code year imputed_gw_worry imputed_gw_when
	gduplicates drop fips_code year, force
	rename fips_code buyer_fips
	save "$pathi\Gallup_gw_vars_2010_2016.dta", replace
